View Javadoc

1   // DeflaterHuffman.java, created Mon Jul  8  4:06:18 2002 by joewhaley
2   // Copyright (C) 2001-3 John Whaley <jwhaley@alum.mit.edu>
3   // Licensed under the terms of the GNU LGPL; see COPYING for details.
4   package joeq.ClassLib.Common.java.util.zip;
5   
6   /***
7    * DeflaterHuffman
8    *
9    * @author  John Whaley <jwhaley@alum.mit.edu>
10   * @version $Id: DeflaterHuffman.java 1451 2004-03-09 06:27:08Z jwhaley $
11   */
12  class DeflaterHuffman {
13  
14      private static final int BUFSIZE =
15          1 << (DeflaterConstants.DEFAULT_MEM_LEVEL + 6);
16      private static final int LITERAL_NUM = 286;
17      private static final int DIST_NUM = 30;
18      private static final int BITLEN_NUM = 19;
19      private static final int REP_3_6 = 16;
20      private static final int REP_3_10 = 17;
21      private static final int REP_11_138 = 18;
22      private static final int EOF_SYMBOL = 256;
23      private static final int[] BL_ORDER =
24          { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
25  
26      private static final String bit4Reverse =
27          "\000\010\004\014\002\012\006\016\001\011\005\015\003\013\007\017";
28  
29      class Tree {
30          short[] freqs;
31          short[] codes;
32          byte[] length;
33          int[] bl_counts;
34          int minNumCodes, numCodes;
35          int maxLength;
36  
37          Tree(int elems, int minCodes, int maxLength) {
38              this.minNumCodes = minCodes;
39              this.maxLength = maxLength;
40              freqs = new short[elems];
41              bl_counts = new int[maxLength];
42          }
43  
44          void reset() {
45              for (int i = 0; i < freqs.length; i++)
46                  freqs[i] = 0;
47              codes = null;
48              length = null;
49          }
50  
51          final void writeSymbol(int code) {
52              if (DeflaterConstants.DEBUGGING) {
53                  freqs[code]--;
54                  //System.err.print("writeSymbol("+freqs.length+","+code+"): ");
55              }
56              pending.writeBits(codes[code] & 0xffff, length[code]);
57          }
58  
59          final void checkEmpty() {
60              boolean empty = true;
61              for (int i = 0; i < freqs.length; i++)
62                  if (freqs[i] != 0) {
63                      System.err.println("freqs[" + i + "] == " + freqs[i]);
64                      empty = false;
65                  }
66              if (!empty)
67                  throw new InternalError();
68              System.err.println("checkEmpty suceeded!");
69          }
70  
71          void setStaticCodes(short[] stCodes, byte[] stLength) {
72              codes = stCodes;
73              length = stLength;
74          }
75  
76          public void buildCodes() {
77              int[] nextCode = new int[maxLength];
78              int code = 0;
79              codes = new short[freqs.length];
80  
81              if (DeflaterConstants.DEBUGGING)
82                  System.err.println("buildCodes: " + freqs.length);
83              for (int bits = 0; bits < maxLength; bits++) {
84                  nextCode[bits] = code;
85                  code += bl_counts[bits] << (15 - bits);
86                  if (DeflaterConstants.DEBUGGING)
87                      System.err.println(
88                          "bits: "
89                              + (bits + 1)
90                              + " count: "
91                              + bl_counts[bits]
92                              + " nextCode: "
93                              + Integer.toHexString(code));
94              }
95              if (DeflaterConstants.DEBUGGING && code != 65536)
96                  throw new RuntimeException("Inconsistent bl_counts!");
97  
98              for (int i = 0; i < numCodes; i++) {
99                  int bits = length[i];
100                 if (bits > 0) {
101                     if (DeflaterConstants.DEBUGGING)
102                         System.err.println(
103                             "codes["
104                                 + i
105                                 + "] = rev("
106                                 + Integer.toHexString(nextCode[bits - 1])
107                                 + "),"
108                                 + bits);
109                     codes[i] = bitReverse(nextCode[bits - 1]);
110                     nextCode[bits - 1] += 1 << (16 - bits);
111                 }
112             }
113         }
114 
115         private void buildLength(int childs[]) {
116             this.length = new byte[freqs.length];
117             int numNodes = childs.length / 2;
118             int numLeafs = (numNodes + 1) / 2;
119             int overflow = 0;
120 
121             for (int i = 0; i < maxLength; i++)
122                 bl_counts[i] = 0;
123 
124             /* First calculate optimal bit lengths */
125             int lengths[] = new int[numNodes];
126             lengths[numNodes - 1] = 0;
127             for (int i = numNodes - 1; i >= 0; i--) {
128                 if (childs[2 * i + 1] != -1) {
129                     int bitLength = lengths[i] + 1;
130                     if (bitLength > maxLength) {
131                         bitLength = maxLength;
132                         overflow++;
133                     }
134                     lengths[childs[2 * i]] =
135                         lengths[childs[2 * i + 1]] = bitLength;
136                 } else {
137                     /* A leaf node */
138                     int bitLength = lengths[i];
139                     bl_counts[bitLength - 1]++;
140                     this.length[childs[2 * i]] = (byte) lengths[i];
141                 }
142             }
143 
144             if (DeflaterConstants.DEBUGGING) {
145                 System.err.println("Tree " + freqs.length + " lengths:");
146                 for (int i = 0; i < numLeafs; i++)
147                     System.err.println(
148                         "Node "
149                             + childs[2 * i]
150                             + " freq: "
151                             + freqs[childs[2 * i]]
152                             + " len: "
153                             + length[childs[2 * i]]);
154             }
155 
156             if (overflow == 0)
157                 return;
158 
159             int incrBitLen = maxLength - 1;
160             do {
161                 /* Find the first bit length which could increase: */
162                 while (bl_counts[--incrBitLen] == 0);
163 
164                 /* Move this node one down and remove a corresponding
165                  * amount of overflow nodes.
166                  */
167                 do {
168                     bl_counts[incrBitLen]--;
169                     bl_counts[++incrBitLen]++;
170                     overflow -= 1 << (maxLength - 1 - incrBitLen);
171                 } while (overflow > 0 && incrBitLen < maxLength - 1);
172             }
173             while (overflow > 0);
174 
175             /* We may have overshot above.  Move some nodes from maxLength to
176              * maxLength-1 in that case.
177              */
178             bl_counts[maxLength - 1] += overflow;
179             bl_counts[maxLength - 2] -= overflow;
180 
181             /* Now recompute all bit lengths, scanning in increasing
182              * frequency.  It is simpler to reconstruct all lengths instead of
183              * fixing only the wrong ones. This idea is taken from 'ar'
184              * written by Haruhiko Okumura.
185              *
186              * The nodes were inserted with decreasing frequency into the childs
187              * array.
188              */
189             int nodePtr = 2 * numLeafs;
190             for (int bits = maxLength; bits != 0; bits--) {
191                 int n = bl_counts[bits - 1];
192                 while (n > 0) {
193                     int childPtr = 2 * childs[nodePtr++];
194                     if (childs[childPtr + 1] == -1) {
195                         /* We found another leaf */
196                         length[childs[childPtr]] = (byte) bits;
197                         n--;
198                     }
199                 }
200             }
201             if (DeflaterConstants.DEBUGGING) {
202                 System.err.println("*** After overflow elimination. ***");
203                 for (int i = 0; i < numLeafs; i++)
204                     System.err.println(
205                         "Node "
206                             + childs[2 * i]
207                             + " freq: "
208                             + freqs[childs[2 * i]]
209                             + " len: "
210                             + length[childs[2 * i]]);
211             }
212         }
213 
214         void buildTree() {
215             int numSymbols = freqs.length;
216 
217             /* heap is a priority queue, sorted by frequency, least frequent
218              * nodes first.  The heap is a binary tree, with the property, that
219              * the parent node is smaller than both child nodes.  This assures
220              * that the smallest node is the first parent.
221              *
222              * The binary tree is encoded in an array:  0 is root node and
223              * the nodes 2*n+1, 2*n+2 are the child nodes of node n.
224              */
225             int[] heap = new int[numSymbols];
226             int heapLen = 0;
227             int maxCode = 0;
228             for (int n = 0; n < numSymbols; n++) {
229                 int freq = freqs[n];
230                 if (freq != 0) {
231                     /* Insert n into heap */
232                     int pos = heapLen++;
233                     int ppos;
234                     while (pos > 0 && freqs[heap[ppos =
235                         (pos - 1) / 2]] > freq) {
236                         heap[pos] = heap[ppos];
237                         pos = ppos;
238                     }
239                     heap[pos] = n;
240                     maxCode = n;
241                 }
242             }
243 
244             /* We could encode a single literal with 0 bits but then we
245              * don't see the literals.  Therefore we force at least two
246              * literals to avoid this case.  We don't care about order in
247              * this case, both literals get a 1 bit code.  
248              */
249             while (heapLen < 2) {
250                 int node = maxCode < 2 ? ++maxCode : 0;
251                 heap[heapLen++] = node;
252             }
253 
254             numCodes = Math.max(maxCode + 1, minNumCodes);
255 
256             int numLeafs = heapLen;
257             int[] childs = new int[4 * heapLen - 2];
258             int[] values = new int[2 * heapLen - 1];
259             int numNodes = numLeafs;
260             for (int i = 0; i < heapLen; i++) {
261                 int node = heap[i];
262                 childs[2 * i] = node;
263                 childs[2 * i + 1] = -1;
264                 values[i] = freqs[node] << 8;
265                 heap[i] = i;
266             }
267 
268             /* Construct the Huffman tree by repeatedly combining the least two
269              * frequent nodes.
270              */
271             do {
272                 int first = heap[0];
273                 int last = heap[--heapLen];
274 
275                 /* Propagate the hole to the leafs of the heap */
276                 int ppos = 0;
277                 int path = 1;
278                 while (path < heapLen) {
279                     if (path + 1 < heapLen
280                         && values[heap[path]] > values[heap[path + 1]])
281                         path++;
282 
283                     heap[ppos] = heap[path];
284                     ppos = path;
285                     path = path * 2 + 1;
286                 }
287 
288                 /* Now propagate the last element down along path.  Normally
289                  * it shouldn't go too deep.
290                  */
291                 int lastVal = values[last];
292                 while ((path = ppos) > 0 && values[heap[ppos =
293                     (path - 1) / 2]] > lastVal)
294                     heap[path] = heap[ppos];
295                 heap[path] = last;
296 
297                 int second = heap[0];
298 
299                 /* Create a new node father of first and second */
300                 last = numNodes++;
301                 childs[2 * last] = first;
302                 childs[2 * last + 1] = second;
303                 int mindepth =
304                     Math.min(values[first] & 0xff, values[second] & 0xff);
305                 values[last] =
306                     lastVal = values[first] + values[second] - mindepth + 1;
307 
308                 /* Again, propagate the hole to the leafs */
309                 ppos = 0;
310                 path = 1;
311                 while (path < heapLen) {
312                     if (path + 1 < heapLen
313                         && values[heap[path]] > values[heap[path + 1]])
314                         path++;
315 
316                     heap[ppos] = heap[path];
317                     ppos = path;
318                     path = ppos * 2 + 1;
319                 }
320 
321                 /* Now propagate the new element down along path */
322                 while ((path = ppos) > 0 && values[heap[ppos =
323                     (path - 1) / 2]] > lastVal)
324                     heap[path] = heap[ppos];
325                 heap[path] = last;
326             }
327             while (heapLen > 1);
328 
329             if (heap[0] != childs.length / 2 - 1)
330                 throw new RuntimeException("Weird!");
331 
332             buildLength(childs);
333         }
334 
335         int getEncodedLength() {
336             int len = 0;
337             for (int i = 0; i < freqs.length; i++)
338                 len += freqs[i] * length[i];
339             return len;
340         }
341 
342         void calcBLFreq(Tree blTree) {
343             int max_count; /* max repeat count */
344             int min_count; /* min repeat count */
345             int count; /* repeat count of the current code */
346             int curlen = -1; /* length of current code */
347 
348             int i = 0;
349             while (i < numCodes) {
350                 count = 1;
351                 int nextlen = length[i];
352                 if (nextlen == 0) {
353                     max_count = 138;
354                     min_count = 3;
355                 } else {
356                     max_count = 6;
357                     min_count = 3;
358                     if (curlen != nextlen) {
359                         blTree.freqs[nextlen]++;
360                         count = 0;
361                     }
362                 }
363                 curlen = nextlen;
364                 i++;
365 
366                 while (i < numCodes && curlen == length[i]) {
367                     i++;
368                     if (++count >= max_count)
369                         break;
370                 }
371 
372                 if (count < min_count)
373                     blTree.freqs[curlen] += count;
374                 else if (curlen != 0)
375                     blTree.freqs[REP_3_6]++;
376                 else if (count <= 10)
377                     blTree.freqs[REP_3_10]++;
378                 else
379                     blTree.freqs[REP_11_138]++;
380             }
381         }
382 
383         void writeTree(Tree blTree) {
384             int max_count; /* max repeat count */
385             int min_count; /* min repeat count */
386             int count; /* repeat count of the current code */
387             int curlen = -1; /* length of current code */
388 
389             int i = 0;
390             while (i < numCodes) {
391                 count = 1;
392                 int nextlen = length[i];
393                 if (nextlen == 0) {
394                     max_count = 138;
395                     min_count = 3;
396                 } else {
397                     max_count = 6;
398                     min_count = 3;
399                     if (curlen != nextlen) {
400                         blTree.writeSymbol(nextlen);
401                         count = 0;
402                     }
403                 }
404                 curlen = nextlen;
405                 i++;
406 
407                 while (i < numCodes && curlen == length[i]) {
408                     i++;
409                     if (++count >= max_count)
410                         break;
411                 }
412 
413                 if (count < min_count) {
414                     while (count-- > 0)
415                         blTree.writeSymbol(curlen);
416                 } else if (curlen != 0) {
417                     blTree.writeSymbol(REP_3_6);
418                     pending.writeBits(count - 3, 2);
419                 } else if (count <= 10) {
420                     blTree.writeSymbol(REP_3_10);
421                     pending.writeBits(count - 3, 3);
422                 } else {
423                     blTree.writeSymbol(REP_11_138);
424                     pending.writeBits(count - 11, 7);
425                 }
426             }
427         }
428     }
429 
430     DeflaterPending pending;
431     private Tree literalTree, distTree, blTree;
432 
433     private short d_buf[];
434     private byte l_buf[];
435     private int last_lit;
436     private int extra_bits;
437 
438     private static short staticLCodes[];
439     private static byte staticLLength[];
440     private static short staticDCodes[];
441     private static byte staticDLength[];
442 
443     /***
444      * Reverse the bits of a 16 bit value.
445      */
446     static short bitReverse(int value) {
447         return (short)
448             (bit4Reverse.charAt(value & 0xf)
449                 << 12 | bit4Reverse.charAt((value >> 4) & 0xf)
450                 << 8 | bit4Reverse.charAt((value >> 8) & 0xf)
451                 << 4 | bit4Reverse.charAt(value >> 12));
452     }
453 
454     static {
455         /* See RFC 1951 3.2.6 */
456         /* Literal codes */
457         staticLCodes = new short[LITERAL_NUM];
458         staticLLength = new byte[LITERAL_NUM];
459         int i = 0;
460         while (i < 144) {
461             staticLCodes[i] = bitReverse((0x030 + i) << 8);
462             staticLLength[i++] = 8;
463         }
464         while (i < 256) {
465             staticLCodes[i] = bitReverse((0x190 - 144 + i) << 7);
466             staticLLength[i++] = 9;
467         }
468         while (i < 280) {
469             staticLCodes[i] = bitReverse((0x000 - 256 + i) << 9);
470             staticLLength[i++] = 7;
471         }
472         while (i < LITERAL_NUM) {
473             staticLCodes[i] = bitReverse((0x0c0 - 280 + i) << 8);
474             staticLLength[i++] = 8;
475         }
476 
477         /* Distant codes */
478         staticDCodes = new short[DIST_NUM];
479         staticDLength = new byte[DIST_NUM];
480         for (i = 0; i < DIST_NUM; i++) {
481             staticDCodes[i] = bitReverse(i << 11);
482             staticDLength[i] = 5;
483         }
484     }
485 
486     public DeflaterHuffman(DeflaterPending pending) {
487         this.pending = pending;
488 
489         literalTree = new Tree(LITERAL_NUM, 257, 15);
490         distTree = new Tree(DIST_NUM, 1, 15);
491         blTree = new Tree(BITLEN_NUM, 4, 7);
492 
493         d_buf = new short[BUFSIZE];
494         l_buf = new byte[BUFSIZE];
495     }
496 
497     public final void reset() {
498         last_lit = 0;
499         extra_bits = 0;
500         literalTree.reset();
501         distTree.reset();
502         blTree.reset();
503     }
504 
505     private final int l_code(int len) {
506         if (len == 255)
507             return 285;
508 
509         int code = 257;
510         while (len >= 8) {
511             code += 4;
512             len >>= 1;
513         }
514         return code + len;
515     }
516 
517     private final int d_code(int distance) {
518         int code = 0;
519         while (distance >= 4) {
520             code += 2;
521             distance >>= 1;
522         }
523         return code + distance;
524     }
525 
526     public void sendAllTrees(int blTreeCodes) {
527         blTree.buildCodes();
528         literalTree.buildCodes();
529         distTree.buildCodes();
530         pending.writeBits(literalTree.numCodes - 257, 5);
531         pending.writeBits(distTree.numCodes - 1, 5);
532         pending.writeBits(blTreeCodes - 4, 4);
533         for (int rank = 0; rank < blTreeCodes; rank++)
534             pending.writeBits(blTree.length[BL_ORDER[rank]], 3);
535         literalTree.writeTree(blTree);
536         distTree.writeTree(blTree);
537         if (DeflaterConstants.DEBUGGING)
538             blTree.checkEmpty();
539     }
540 
541     public void compressBlock() {
542         for (int i = 0; i < last_lit; i++) {
543             int litlen = l_buf[i] & 0xff;
544             int dist = d_buf[i];
545             if (dist-- != 0) {
546                 if (DeflaterConstants.DEBUGGING)
547                     System.err.print(
548                         "[" + (dist + 1) + "," + (litlen + 3) + "]: ");
549 
550                 int lc = l_code(litlen);
551                 literalTree.writeSymbol(lc);
552 
553                 int bits = (lc - 261) / 4;
554                 if (bits > 0 && bits <= 5)
555                     pending.writeBits(litlen & ((1 << bits) - 1), bits);
556 
557                 int dc = d_code(dist);
558                 distTree.writeSymbol(dc);
559 
560                 bits = dc / 2 - 1;
561                 if (bits > 0)
562                     pending.writeBits(dist & ((1 << bits) - 1), bits);
563             } else {
564                 if (DeflaterConstants.DEBUGGING) {
565                     if (litlen > 32 && litlen < 127)
566                         System.err.print("(" + (char) litlen + "): ");
567                     else
568                         System.err.print("{" + litlen + "}: ");
569                 }
570                 literalTree.writeSymbol(litlen);
571             }
572         }
573         if (DeflaterConstants.DEBUGGING)
574             System.err.print("EOF: ");
575         literalTree.writeSymbol(EOF_SYMBOL);
576         if (DeflaterConstants.DEBUGGING) {
577             literalTree.checkEmpty();
578             distTree.checkEmpty();
579         }
580     }
581 
582     public void flushStoredBlock(
583         byte[] stored,
584         int stored_offset,
585         int stored_len,
586         boolean lastBlock) {
587         if (DeflaterConstants.DEBUGGING)
588             System.err.println("Flushing stored block " + stored_len);
589         pending.writeBits(
590             (DeflaterConstants.STORED_BLOCK << 1) + (lastBlock ? 1 : 0),
591             3);
592         pending.alignToByte();
593         pending.writeShort(stored_len);
594         pending.writeShort(~stored_len);
595         pending.writeBlock(stored, stored_offset, stored_len);
596         reset();
597     }
598 
599     public void flushBlock(
600         byte[] stored,
601         int stored_offset,
602         int stored_len,
603         boolean lastBlock) {
604         literalTree.freqs[EOF_SYMBOL]++;
605 
606         /* Build trees */
607         literalTree.buildTree();
608         distTree.buildTree();
609 
610         /* Calculate bitlen frequency */
611         literalTree.calcBLFreq(blTree);
612         distTree.calcBLFreq(blTree);
613 
614         /* Build bitlen tree */
615         blTree.buildTree();
616 
617         int blTreeCodes = 4;
618         for (int i = 18; i > blTreeCodes; i--) {
619             if (blTree.length[BL_ORDER[i]] > 0)
620                 blTreeCodes = i + 1;
621         }
622         int opt_len =
623             14
624                 + blTreeCodes * 3
625                 + blTree.getEncodedLength()
626                 + literalTree.getEncodedLength()
627                 + distTree.getEncodedLength()
628                 + extra_bits;
629 
630         int static_len = extra_bits;
631         for (int i = 0; i < LITERAL_NUM; i++)
632             static_len += literalTree.freqs[i] * staticLLength[i];
633         for (int i = 0; i < DIST_NUM; i++)
634             static_len += distTree.freqs[i] * staticDLength[i];
635         if (opt_len >= static_len) {
636             /* Force static trees */
637             opt_len = static_len;
638         }
639 
640         if (stored_offset >= 0 && stored_len + 4 < opt_len >> 3) {
641             /* Store Block */
642             if (DeflaterConstants.DEBUGGING)
643                 System.err.println(
644                     "Storing, since "
645                         + stored_len
646                         + " < "
647                         + opt_len
648                         + " <= "
649                         + static_len);
650             flushStoredBlock(stored, stored_offset, stored_len, lastBlock);
651         } else if (opt_len == static_len) {
652             /* Encode with static tree */
653             pending.writeBits(
654                 (DeflaterConstants.STATIC_TREES << 1) + (lastBlock ? 1 : 0),
655                 3);
656             literalTree.setStaticCodes(staticLCodes, staticLLength);
657             distTree.setStaticCodes(staticDCodes, staticDLength);
658             compressBlock();
659             reset();
660         } else {
661             /* Encode with dynamic tree */
662             pending.writeBits(
663                 (DeflaterConstants.DYN_TREES << 1) + (lastBlock ? 1 : 0),
664                 3);
665             sendAllTrees(blTreeCodes);
666             compressBlock();
667             reset();
668         }
669     }
670 
671     public final boolean isFull() {
672         return last_lit == BUFSIZE;
673     }
674 
675     public final boolean tallyLit(int lit) {
676         if (DeflaterConstants.DEBUGGING) {
677             if (lit > 32 && lit < 127)
678                 System.err.println("(" + (char) lit + ")");
679             else
680                 System.err.println("{" + lit + "}");
681         }
682         d_buf[last_lit] = 0;
683         l_buf[last_lit++] = (byte) lit;
684         literalTree.freqs[lit]++;
685         return last_lit == BUFSIZE;
686     }
687 
688     public final boolean tallyDist(int dist, int len) {
689         if (DeflaterConstants.DEBUGGING)
690             System.err.println("[" + dist + "," + len + "]");
691 
692         d_buf[last_lit] = (short) dist;
693         l_buf[last_lit++] = (byte) (len - 3);
694 
695         int lc = l_code(len - 3);
696         literalTree.freqs[lc]++;
697         if (lc >= 265 && lc < 285)
698             extra_bits += (lc - 261) / 4;
699 
700         int dc = d_code(dist - 1);
701         distTree.freqs[dc]++;
702         if (dc >= 4)
703             extra_bits += dc / 2 - 1;
704         return last_lit == BUFSIZE;
705     }
706 
707 }